{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "rqFcxPYW6YlF",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "ba5dd78c-fc1a-4534-e82d-4c76bf3aad86"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Question: Who is the CEO of Microsoft?\n",
            "Answer: Satya Nadella\n",
            "\n",
            "Question: Where is the headquarters of Google?\n",
            "Answer: Mountain View\n",
            "\n"
          ]
        }
      ],
      "source": [
        "from transformers import AutoTokenizer, TapasForQuestionAnswering\n",
        "import pandas as pd\n",
        "\n",
        "# Define the tokenizer and model\n",
        "tokenizer = AutoTokenizer.from_pretrained(\"google/tapas-base-finetuned-wtq\")\n",
        "model = TapasForQuestionAnswering.from_pretrained(\"google/tapas-base-finetuned-wtq\")\n",
        "\n",
        "# Define the data for the table\n",
        "data = {\n",
        "    \"Company\": [\"Apple\", \"Microsoft\", \"Google\"],\n",
        "    \"CEO\": [\"Tim Cook\", \"Satya Nadella\", \"Sundar Pichai\"],\n",
        "    \"Headquarters\": [\"Cupertino\", \"Redmond\", \"Mountain View\"]\n",
        "}\n",
        "# Convert the data into a pandas DataFrame\n",
        "table = pd.DataFrame.from_dict(data)\n",
        "# Define the questions (queries)\n",
        "queries = [\"Who is the CEO of Microsoft?\", \"Where is the headquarters of Google?\"]\n",
        "\n",
        "# Tokenize the table and queries\n",
        "inputs = tokenizer(table=table, queries=queries, padding=\"max_length\", return_tensors=\"pt\")\n",
        "# Make predictions with the model\n",
        "outputs = model(**inputs)\n",
        "\n",
        "# Extract the predicted answer coordinates and aggregation indices\n",
        "predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(\n",
        "    inputs,\n",
        "    outputs.logits.detach(),\n",
        "    outputs.logits_aggregation.detach()\n",
        ")\n",
        "\n",
        "# Iterate over the queries and print the answers\n",
        "for i, query in enumerate(queries):\n",
        "    if predicted_aggregation_indices[i] == 0:\n",
        "        # If there is no aggregation operation (index 0), print the cells\n",
        "        coords_to_answer = ' '.join([table.iat[coord] for coord in predicted_answer_coordinates[i]])\n",
        "        print(f\"Question: {query}\")\n",
        "        print(f\"Answer: {coords_to_answer}\\n\")\n",
        "    else:\n",
        "        # If there is an aggregation operation, print the operation's name (from the list of operations)\n",
        "        print(f\"Question: {query}\")\n",
        "        print(f\"Answer: {tokenizer.model.config.id2label[predicted_aggregation_indices[i]]}\\n\")\n"
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "fsD6Qokq-WN5"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}